## Replication code for "Mayoral Partisanship and Municipal Fiscal Policy"
## Data descriptives for cities
## Chris Warshaw and Justin de Benedictis-Kessner
### Put all files into the same working directory

## This code produces Figure 1 (map), and the descriptive table in Appendix A.

library(xtable)
a <- data2
a <- a[!duplicated(a$fips.x),]
nrow(a) # 203
mean(a$population_est) # 322728.2
sd(a$population_est) # 671322
## regions:
# west
# south
# north
northeast <- c(09,23,25,33,44,50,34,36,42,17,18,26,39,55,19,20,27,29,31,38,46)
south <- c(10,11,12,13,24,37,45,51,54,01,21,28,47,05,22,40,48)
west <- c(04,08,16,30,32,35,49,56,02,06,15,41,53)
mean(a$fips_state %in% west) # 0.281
sd(a$fips_state %in% west) # 0.450
mean(a$fips_state %in% south) # 0.320
sd(a$fips_state %in% south) # 0.468
mean(a$fips_state %in% northeast) # 0.399
sd(a$fips_state %in% northeast) # 0.491

demog <- read.csv("place_demog.csv") # from Social Explorer: http://www.socialexplorer.com/tables/C2000/R10985227
a <- merge(a,demog,by.x="fips.x",by.y="Geo_FIPS",all.x=T)
## race:
# white alone: SE_T014_002
mean(a$SE_T014_002/a$SE_T014_001,na.rm=T) # 0.6528
sd(a$SE_T014_002/a$SE_T014_001,na.rm=T) # 0.1794

# black alone: SE_T014_003
mean(a$SE_T014_003/a$SE_T014_001,na.rm=T) # 0.1897
sd(a$SE_T014_003/a$SE_T014_001,na.rm=T) # 0.1839

## educ:
# college degree or more: SE_T043_005
mean(a$SE_T043_005/a$SE_T014_001,na.rm=T) # 0.1625
sd(a$SE_T043_005/a$SE_T014_001,na.rm=T) # 0.0717

## income
# median household income 1999 dollars: SE_T093_001
mean(a$SE_T093_001,na.rm=T) # 39561.03
sd(a$SE_T093_001,na.rm=T) # 10864.79

## house value
# median house value - owner occupied? SE_T163_001
mean(a$SE_T163_001,na.rm=T) # 120466.7
sd(a$SE_T163_001,na.rm=T) # 60143.86

tab <- c(mean(a$population_est),sd(a$population_est),mean(a$fips_state %in% west),sd(a$fips_state %in% west),mean(a$fips_state %in% south),sd(a$fips_state %in% south),mean(a$fips_state %in% northeast),sd(a$fips_state %in% northeast),mean(a$SE_T014_002/a$SE_T014_001,na.rm=T),sd(a$SE_T014_002/a$SE_T014_001,na.rm=T),mean(a$SE_T014_003/a$SE_T014_001,na.rm=T),sd(a$SE_T014_003/a$SE_T014_001,na.rm=T),mean(a$SE_T043_005/a$SE_T014_001,na.rm=T),sd(a$SE_T043_005/a$SE_T014_001,na.rm=T),mean(a$SE_T093_001,na.rm=T),sd(a$SE_T093_001,na.rm=T),mean(a$SE_T163_001,na.rm=T),sd(a$SE_T163_001,na.rm=T),nrow(a))

# From F&G table:
allUS <- c( 7666, "(62732)", 0.12, "(0.33)", 0.24, "(0.43)", 0.23, "(0.42)", 0.88, "(0.20)", 0.05, "(0.14)", 0.17, "(0.13)", 46916,  "(19262)", 100526,"(86412)",34574)
over25k <- c(86245, "(255000)", 0.24 ,"(0.42)", 0.24, "(0.43)", 0.25, "(0.43)", 0.75, "(0.19)", 0.11, "(0.16)", 0.28, "(0.15)", 57927,"(19566)",156718,"(100769)",1893)

xtable(data.frame(ourdata=tab,allUS=allUS,over25k=over25k),digits=2)

## calculating the US and over 75k myself:
length(unique(demog$Geo_FIPS)) # 25375
# population:
mean(demog$SE_T014_001) # 8226.02
sd(demog$SE_T014_001) # 68367.65

# regions:
# mean(demog$Geo_REGION==2) # midwest
mean(demog$Geo_REGION==4) # west = 0.168
mean(demog$Geo_REGION==3) # south = 0.331
mean(demog$Geo_REGION==1) # northeast = 0.141

## race:
# white alone: SE_T014_002
mean(demog$SE_T014_002/demog$SE_T014_001,na.rm=T) # 0.848
sd(demog$SE_T014_002/demog$SE_T014_001,na.rm=T) # 0.212

# black alone: SE_T014_003
mean(demog$SE_T014_003/demog$SE_T014_001,na.rm=T) # 0.069
sd(demog$SE_T014_003/demog$SE_T014_001,na.rm=T) # 0.158
## educ:
# college degree or more: SE_T043_005
mean(demog$SE_T043_005/demog$SE_T014_001,na.rm=T) # 0.116
sd(demog$SE_T043_005/demog$SE_T014_001,na.rm=T) # 0.103

## income
# median household income 1999 dollars: SE_T093_001
mean(demog$SE_T093_001,na.rm=T) # 38519.86
sd(demog$SE_T093_001,na.rm=T) # 18804.84

## house value
# median house value - owner occupied? SE_T163_001
mean(demog$SE_T163_001,na.rm=T) # 95024.97
sd(demog$SE_T163_001,na.rm=T) # 92256.89

allUS <- c(mean(demog$SE_T014_001),sd(demog$SE_T014_001),mean(demog$Geo_REGION==4),sd(demog$Geo_REGION==4),mean(demog$Geo_REGION==3),sd(demog$Geo_REGION==3),mean(demog$Geo_REGION==1),sd(demog$Geo_REGION==1),mean(demog$SE_T014_002/demog$SE_T014_001,na.rm=T),sd(demog$SE_T014_002/demog$SE_T014_001,na.rm=T),mean(demog$SE_T014_003/demog$SE_T014_001,na.rm=T),sd(demog$SE_T014_003/demog$SE_T014_001,na.rm=T), mean(demog$SE_T043_005/demog$SE_T014_001,na.rm=T),sd(demog$SE_T043_005/demog$SE_T014_001,na.rm=T),mean(demog$SE_T093_001,na.rm=T),sd(demog$SE_T093_001,na.rm=T),mean(demog$SE_T163_001,na.rm=T),sd(demog$SE_T163_001,na.rm=T),length(unique(demog$Geo_FIPS)))

big <- demog[demog$SE_T014_001>25000,]
bigger <- demog[demog$SE_T014_001>75000,]

over25k <- c(mean(big$SE_T014_001),sd(big$SE_T014_001),mean(big$Geo_STATE %in% west),sd(big$Geo_STATE %in% west),mean(big$Geo_STATE %in% south),sd(big$Geo_STATE %in% south),mean(big$Geo_STATE %in% northeast),sd(big$Geo_STATE %in% northeast),mean(big$SE_T014_002/big$SE_T014_001,na.rm=T),sd(big$SE_T014_002/big$SE_T014_001,na.rm=T),mean(big$SE_T014_003/big$SE_T014_001,na.rm=T),sd(big$SE_T014_003/big$SE_T014_001,na.rm=T), mean(big$SE_T043_005/big$SE_T014_001,na.rm=T),sd(big$SE_T043_005/big$SE_T014_001,na.rm=T),mean(big$SE_T093_001,na.rm=T),sd(big$SE_T093_001,na.rm=T),mean(big$SE_T163_001,na.rm=T),sd(big$SE_T163_001,na.rm=T),length(unique(big$Geo_FIPS)))

over75k <- c(mean(bigger$SE_T014_001),sd(bigger$SE_T014_001),mean(bigger$Geo_STATE %in% west),sd(bigger$Geo_STATE %in% west),mean(bigger$Geo_STATE %in% south),sd(bigger$Geo_STATE %in% south),mean(bigger$Geo_STATE %in% northeast),sd(bigger$Geo_STATE %in% northeast),mean(bigger$SE_T014_002/bigger$SE_T014_001,na.rm=T),sd(bigger$SE_T014_002/bigger$SE_T014_001,na.rm=T),mean(bigger$SE_T014_003/bigger$SE_T014_001,na.rm=T),sd(bigger$SE_T014_003/bigger$SE_T014_001,na.rm=T), mean(bigger$SE_T043_005/bigger$SE_T014_001,na.rm=T),sd(bigger$SE_T043_005/bigger$SE_T014_001,na.rm=T),mean(bigger$SE_T093_001,na.rm=T),sd(bigger$SE_T093_001,na.rm=T),mean(bigger$SE_T163_001,na.rm=T),sd(bigger$SE_T163_001,na.rm=T),length(unique(bigger$Geo_FIPS)))

names <- c("Population","" ,"% West","" ,"% South","" ,"% Northeast","" ,"% White","" ,"% Black","" ,"% College degree or more","" , "Median household income","" , "Median home value","" , "Number of cities")

xtable(data.frame(rownames=names,ourdata=tab,allUS=allUS,over25k=over25k,over75k=over75k),digits=2)

## what percentage of total US is in our sample?
# total population in 75k cities:
sum(demog$SE_T014_001) # 208,735,266
sum(bigger$SE_T014_001) # 88,154,997
sum(bigger$SE_T014_001)/sum(demog$SE_T014_001) # 0.422
sum(a$SE_T014_001,na.rm=T)/sum(demog$SE_T014_001) # 0.30595

head(a[order(a$SE_T014_001,decreasing=F),]) # find example cities

# checking significance of balance across sample/target population:
t.test(bigger$SE_T014_001,a$population_est) # p=0.1526
t.test(a$fips_state %in% west,bigger$Geo_STATE %in% west) # p=0.03481
t.test(a$fips_state %in% south,bigger$Geo_STATE %in% south) # p=0.7123
t.test(a$fips_state %in% northeast,bigger$Geo_STATE %in% northeast) # p=0.03514
t.test(a$SE_T014_002/a$SE_T014_001,bigger$SE_T014_002/bigger$SE_T014_001) # p=0.5694
t.test(a$SE_T014_003/a$SE_T014_001, bigger$SE_T014_003/bigger$SE_T014_001) # p=0.03829 % black
t.test(a$SE_T043_005/a$SE_T014_001,bigger$SE_T043_005/bigger$SE_T014_001) # p=0.4628
t.test(a$SE_T093_001,bigger$SE_T093_001) # p=0.008228 median income
t.test(a$SE_T163_001,bigger$SE_T163_001) # p=0.001413 median home value


#### Mapping:
library(maptools) # lots of good tools for manipulating maps
library(rgdal) # better for opening shapefiles
library(RgoogleMaps) # googlemaps API tool
library(classInt) # color scales
# library(OpenStreetMap)
library(RColorBrewer) # color scales
library(ggplot2); library(ggmap) # ggplot addition for fancy maps
library(maps);library(mapdata)

a$fulladdress <- paste(a$city,a$STATEAB,sep=", ")
a$lon <- NA
a$lat <- NA
a[,c("lon","lat")] <- geocode(a$fulladdress)

bounds <- c(left=(mean(a$lon)-(mean(a$lon)-min(a$lon))*1.4),right=(mean(a$lon)+(max(a$lon)-mean(a$lon))*1.4),bottom=(mean(a$lat)-(mean(a$lat)-min(a$lat))*1.2),top=(mean(a$lat)+(max(a$lat)-mean(a$lat))*1.2))
# anchorage AK throwing off the bounding box, so eliminating:
bounds <- c(left=(mean(a$lon[a$STATEAB!="AK"])-(mean(a$lon[a$STATEAB!="AK"])-min(a$lon[a$STATEAB!="AK"]))*1.4),right=(mean(a$lon[a$STATEAB!="AK"])+(max(a$lon[a$STATEAB!="AK"])-mean(a$lon[a$STATEAB!="AK"]))*1.5),bottom=(mean(a$lat[a$STATEAB!="AK"])-(mean(a$lat[a$STATEAB!="AK"])-min(a$lat[a$STATEAB!="AK"]))*1.2),top=(mean(a$lat[a$STATEAB!="AK"])+(max(a$lat[a$STATEAB!="AK"])-mean(a$lat[a$STATEAB!="AK"]))*1.2))


# Problems with Alaska, Hawaii:
load(file="states_map.Rdata")
states2 <- spTransform(states, CRS("+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs"))
states2@data$id <- rownames(states2@data)


alaska <- states2[states2$STATE_ABBR=="AK",]
alaska <- elide(alaska, rotate=-50)
alaska <- elide(alaska, bb=bbox(alaska), scale=max(apply(bbox(alaska), 1, diff)) / 2.3)
alaska <- elide(alaska, shift=c(-2100000, -2500000))
proj4string(alaska) <- proj4string(states2)

# extract, then rotate & shift hawaii
hawaii <- states2[states2$STATE_ABBR=="HI",]
hawaii <- elide(hawaii, rotate=-35)
hawaii <- elide(hawaii, shift=c(5400000, -1400000))
proj4string(hawaii) <- proj4string(states2)

states2 <- states2[!states2$STATE_ABBR %in% c("AK","HI"),]
states2 <- rbind(states2, alaska, hawaii)

plot(states2)

# do the same for city points:
b <- data.frame(lon=a$lon,lat=a$lat,city=a$city,state=a$STATEAB)
coordinates(b) <- ~ lon + lat
proj4string(b) <- CRS("+init=epsg:4326")
b <- spTransform(b,CRS("+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs"))
anchorage <- b[b$state=="AK",]
anchorage <- elide(anchorage, rotate=-50)
anchorage <- elide(anchorage, bb=bbox(alaska), scale=max(apply(bbox(alaska), 1, diff)) / 2.3)
anchorage <- elide(anchorage, shift=c(-2100000, -2500000))

anchorage <- data.frame(lon=-1404540,lat=-2035876,city="anchorage",state="AK")
coordinates(anchorage) <- ~ lon + lat
proj4string(anchorage) <- proj4string(b)

b <- b[b$state!="AK",]
b <- rbind(b,anchorage)

b <- merge(b, a[,c("city","STATEAB","SE_T014_001")],by.x=c("city","state"),by.y=c("city","STATEAB"))

b$plotname <- NULL
for(i in 1:nrow(b)){
  b$plotname[i] <- ifelse(b$SE_T014_001[i]>=max(b$SE_T014_001[which(b$state==b$state[i])]),1,0)
  b$plotname[i] <- ifelse(b$SE_T014_001[i]>=500000,1,b$plotname[i])
}


## not plotting anchorage correctly... just going to eliminate AK and HI

pdf("Figures/cities_map4.pdf",height=10,width=13)
par(mar=c(0,0,0,0))
plot(states2[!(states2$STATE_ABBR %in% c("AK","HI")),],lwd=0.001,col="grey",lty=0)
points(b[b$state!="AK",],col="black",pch=".",cex=3)
text(b$lon[which(b$state!="AK" & b$plotname==1)],b$lat[which(b$state!="AK" & b$plotname==1)],labels=b$city[which(b$state!="AK" & b$plotname==1)],cex=0.5,pos=3,offset=0.2,col="black")
dev.off()

